Environment

load R environment

library(tidyverse)
library(Seurat)

Load IPF Cell Atlas data

Spapros is expecting scanpy format data with raw counts. Load based on established R workflow then convert. ### Read in GSE136831

original.dir <- ('~/vcalab-files/datasets/GSE136831_IPF_Cell_Atlas/')
metadata <- read_tsv(paste0(original.dir,'GSE136831_AllCells.Samples.CellType.MetadataTable.txt.gz'))
Rows: 312928 Columns: 9── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (7): CellBarcode_Identity, CellType_Category, Manuscript_Identity, Subclass_Cell_Identity, Disease_Identi...
dbl (2): nUMI, nGene
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- as.data.frame(metadata)
rownames(metadata) <- metadata$CellBarcode_Identity

features <- read_tsv(paste0(original.dir,'GSE136831_AllCells.GeneIDs.txt.gz'))
Rows: 45947 Columns: 2── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): Ensembl_GeneID, HGNC_EnsemblAlt_GeneID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cells <- read_tsv(paste0(original.dir,'GSE136831_AllCells.cellBarcodes.txt.gz'), col_names = F)
Rows: 312928 Columns: 1── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
counts <- Matrix::readMM(paste0(original.dir,'GSE136831_RawCounts_Sparse.mtx.gz'))
rownames(counts) <- features$HGNC_EnsemblAlt_GeneID
colnames(counts) <- cells$X1
ipfatlas.cells <- CreateSeuratObject(counts=counts, meta.data=metadata)
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
rm(counts,cells,features,metadata)
gc()
             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    4287022  229.0    6682443   356.9    6682443   356.9
Vcells 1053056767 8034.2 3672764273 28021.0 4194266324 31999.8

Standard filters

ipfatlas.cells[['percent.mt']] <- PercentageFeatureSet(ipfatlas.cells, pattern='^MT-')
VlnPlot(ipfatlas.cells, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

ipfatlas.cells <- subset(ipfatlas.cells, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10)

Convert to scanpy compatible format (anndata)

This is remarkably not a solved problem but sceasy has functions for it. have to back-convert the Seurat5 object to a v3-like assay.

ipfatlas.cells[['RNA']] <- as(ipfatlas.cells[['RNA']],'Assay')
Warning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘data’ is emptyWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘scale.data’ is emptyWarning: Assay RNA changing from Assay5 to Assay
sceasy::convertFormat(ipfatlas.cells, from='seurat',to='anndata', outFile = 'ipfatlas.adata')
Warning: Dropping single category variables:orig.ident
AnnData object with n_obs × n_vars = 253424 × 45947
    obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
    var: 'name'

load python environment

Weird behavior when reticulate instantiated by other packages, therefore restart R.

.rs.restartR()
NULL

Using the SPAPROS workflow documented by theis lab which is python/scanpy based.

import pandas as pd
import scanpy as sc
import spapros as sp
sc.settings.verbosity=0
sc.logging.print_header()
scanpy==1.9.6 anndata==0.10.3 umap==0.5.5 numpy==1.26.3 scipy==1.11.4 pandas==1.5.3 scikit-learn==1.1.3 statsmodels==0.14.1 igraph==0.9.11 pynndescent==0.5.11
print(f"spapros=={sp.__version__}")
spapros==0.1.4

Access adata object on the python side. to avoid name conversions throughout python code, just make it “adata.” Unpredictable behavior when porting from R to python environment, so store as h5ad and read back in.

adata = sc.read_h5ad('ipfatlas.adata')
adata
AnnData object with n_obs × n_vars = 253424 × 45947
    obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
    var: 'name'

SPAPROS probe selection

Preprocess data with scanpy

Filtering has already been done on the Seurat side.

sc.pp.normalize_total(adata, target_sum=10000) # 10k target matches default Seurat scale factor
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata,flavor="cell_ranger",n_top_genes=1000)
sc.pp.pca(adata, svd_solver='lobpcg') # had issues with arpack
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited at iteration 20 with accuracies 
[4.79425120e-08 4.33860979e-07 4.25535729e-08 1.19035951e-07
 4.12742959e-08 1.13385877e-07 7.08756943e-07 1.37830810e-06
 1.51369674e-07 5.47363570e-07 2.09337811e-06 3.11242987e-07
 1.04559607e-06 8.92120768e-08 2.06874692e-07 1.38487152e-07
 1.12949811e-06 7.70405333e-07 1.15730129e-06 2.44537505e-06
 4.45034612e-07 1.09552891e-06 2.38279297e-07 1.70460495e-06
 4.95124826e-07 4.02370540e-07 2.41765186e-06 6.03523887e-07
 2.19781845e-06 1.21796480e-06 5.84655170e-07 4.03826669e-06
 1.93481355e-06 2.18713447e-06 2.79229665e-06 6.98544531e-06
 1.46472059e-06 4.01444386e-06 3.26535748e-06 1.65977582e-05
 1.99434152e-05 1.90039855e-04 2.84961516e-04 8.43750002e-04
 5.89064658e-03 9.32999730e-03 8.68964326e-03 4.86836385e-02
 1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
Use iteration 21 instead with accuracy 
0.3431597728816552.

  _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited postprocessing with accuracies 
[4.81829694e-08 4.33156171e-07 4.15085166e-08 1.18443928e-07
 4.10329817e-08 1.13370684e-07 7.08724601e-07 1.37826510e-06
 1.51156757e-07 5.47290646e-07 2.09345190e-06 3.11020503e-07
 1.04560254e-06 8.90149173e-08 2.06879686e-07 1.38642587e-07
 1.12966102e-06 7.70430839e-07 1.15727319e-06 2.44537198e-06
 4.45026163e-07 1.09566625e-06 2.38218842e-07 1.70461422e-06
 4.95173291e-07 4.02321463e-07 2.41768558e-06 6.03567884e-07
 2.19774360e-06 1.21802253e-06 5.84547692e-07 4.03831660e-06
 1.93482576e-06 2.18726418e-06 2.79223102e-06 6.98547527e-06
 1.46476658e-06 4.01439911e-06 3.26532234e-06 1.65977890e-05
 1.99434840e-05 1.90039834e-04 2.84961507e-04 8.43750076e-04
 5.89064660e-03 9.32999723e-03 8.68964326e-03 4.86836385e-02
 1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
  _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['CellType_Category'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
sc.pl.umap(adata, color=['Disease_Identity'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

SPAPROS basic: no constraints.

Asked for 200 genes, because one imagines that various stakeholders will add another 100 for IPF subsets, and probably asthma stakeholders will add another 100 bringing total to 400. Note that this dataset includes normal lung and COPD lung.

selector = sp.se.ProbesetSelector(adata, n=200, celltype_key="Manuscript_Identity", verbosity=0, save_dir=None)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
     Ionocyte : 6
     PNEC     : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py:752: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.
  warnings.warn(

Export probes for simulations

selector.probeset[selector.probeset.selection].to_csv('SPAPROS-basic200.csv')

Quick summary of selected probes

What is the distribution of chosen probes among methods?

selector.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()

I don’t understand what the 4th column is, 33 genes that have no attribution for the method of selection?

# list probes
selector.probeset.index[selector.probeset.selection]
Index(['TPSB2', 'CCL21', 'GRP', 'EMP2', 'HYDIN', 'MIR205HG', 'TPM2', 'ZNF385D',
       'FCN3', 'S100B',
       ...
       'COL6A2', 'CXCL5', 'TIMP3', 'AGBL4', 'ADAM19', 'BATF', 'LIMCH1',
       'SKAP1', 'CTSW', 'ABLIM1'],
      dtype='object', length=200)

Table of characteristics for selected probes

probe_candidates = selector.probeset[selector.probeset.selection]
probe_candidates
        gene_nr  selection  ...  required_marker  required_list_marker
TPSB2         1       True  ...             True                 False
CCL21         2       True  ...             True                 False
GRP           3       True  ...             True                 False
EMP2          4       True  ...             True                 False
HYDIN         5       True  ...             True                 False
...         ...        ...  ...              ...                   ...
BATF        196       True  ...            False                 False
LIMCH1      197       True  ...            False                 False
SKAP1       198       True  ...            False                 False
CTSW        199       True  ...            False                 False
ABLIM1      200       True  ...            False                 False

[200 rows x 17 columns]

Dotplots of probeset split by selection method

Cluster markers (DE 1vs all)

probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

Nuanced cluster markers (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:25], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[25::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

PCA based (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

SPAPROS with hand-selected genes

We already have some xenium runs where some probesets have reasonable average detection and show up as highly-variable genes in clustering of the Xenium data. Therefore I regard these genes as presumptively high-performance informative genes. Including these in the diversity panel will mitigate the risk of selecting genes whose probesets do not perform well for technical reasons. Therefore ask SPAPROS to include these genes and then modify its other selections accordingly.

length(probesets_selected)
[1] 72
preselected_genes=r.probesets_selected
# Try to account for selected genes that are in PCA genes.
tmp = sp.se.select_pca_genes(adata, n=adata.n_vars, inplace=False)["selection_ranking"]
n_pca_genes = tmp.loc[~tmp.index.isin(preselected_genes)].sort_values().iloc[:20].max().astype(int)

For PCA ranking genes, there are only 2 (S100A8 and AREG) that are in the top 100 PCA genes. So I think we are reasonably safe to just add more PCA genes on top of these.

selector = sp.se.ProbesetSelector(adata, n=200, celltype_key="Manuscript_Identity", verbosity=0, save_dir=None, preselected_genes=preselected_genes)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
     Ionocyte : 6
     PNEC     : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(

Export probes for simulations

selector.probeset[selector.probeset.selection].to_csv('SPAPROS-200withXeniumCuration.csv')

Quick summary of selected probes

What is the distribution of chosen probes among methods?

selector.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()

# list probes
selector.probeset.index[selector.probeset.selection]
Index(['IL1RL1', 'S100A8', 'VWF', 'IRF4', 'AREG', 'KRT7', 'KIT', 'KRT15',
       'PLA2G2A', 'SCEL',
       ...
       'PPP1R16B', 'BAG3', 'HSPA1A', 'HSPA1B', 'ENSG00000276085', 'CCL20',
       'MT1G', 'STAT4', 'CCL18', 'CCL2'],
      dtype='object', length=200)

Table of characteristics for selected probes

probe_candidates = selector.probeset[selector.probeset.selection]
probe_candidates
        gene_nr  selection  ...  required_marker  required_list_marker
IL1RL1        1       True  ...             True                 False
S100A8        2       True  ...             True                 False
VWF           3       True  ...             True                 False
IRF4          4       True  ...             True                 False
AREG          5       True  ...             True                 False
...         ...        ...  ...              ...                   ...
CCL20       196       True  ...            False                 False
MT1G        197       True  ...            False                 False
STAT4       198       True  ...            False                 False
CCL18       199       True  ...            False                 False
CCL2        200       True  ...            False                 False

[200 rows x 17 columns]

Dotplots of probeset split by selection method

Cluster markers (DE 1vs all)

probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

Nuanced cluster markers (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:25], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[25::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

PCA based (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

SPAPROS with expression penalties

Penalties are 0 to 1, where 0 is bad and 1 is best (i.e., no penalty). Behavior of penalties seems to depend on the the source. If identified by the package, it’s a multiplier. If the source is from a manually curated list, all genes with any penalty is removed.

We were told by 10x that SCGB1A1 and SFTPC had too high abundance to design probes. COL1A1 is the highest single detected gene in trial runs; I am not aware of any codeword budget issues with COL1A1.

sc.pl.violin(adata, ['SCGB1A1', 'SFTPC', 'COL1A1'])
sc.pl.violin(adata, ['GDF15', 'CLDN4', 'KRT8'])

If we wanted to be fairly aggressive, a threshold of like 1 to 6 would get us genes that are like COL1A1 or even more highly expressed. Some high performance genes like KRT8 wouldn’t necessarily be selected.

# Set thresholds
lower_th = 1.0
upper_th = 6
FACTOR = 0.1

# Calculate quantiles
sp.ut.get_expression_quantile(adata, q=0.99, normalise=False, log1p=False, zeros_to_nan=False)
sp.ut.get_expression_quantile(adata, q=0.9, normalise=False, log1p=False, zeros_to_nan=True)

# Get penalty functions for given factor
penalty_fcts={}
penalty_fcts[f"lower_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.1 * FACTOR, x_min=lower_th, x_max=None)
penalty_fcts[f"upper_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.5*FACTOR, x_min=None, x_max=upper_th)
# Calculate each gene's penalty value
adata.var[f"expr_penalty_lower_{FACTOR}"] = penalty_fcts[f"lower_{FACTOR}"](adata.var['quantile_0.9 expr > 0'])
adata.var[f"expr_penalty_upper_{FACTOR}"] = penalty_fcts[f"upper_{FACTOR}"](adata.var['quantile_0.99'])
# PCA and DE selections with penalties
penalty_keys = [f"expr_penalty_lower_{FACTOR}",f"expr_penalty_upper_{FACTOR}"]

adata.var["expr_penalty_lower"] = adata.var[f"expr_penalty_lower_{FACTOR}"]
adata.var["expr_penalty_upper"] = adata.var[f"expr_penalty_upper_{FACTOR}"]

Run the selection. In this run we have not seeded any specific number of genes, curious what it comes up with.

# create an instance of the ProbesetSelector class
selector_highexpression = sp.se.ProbesetSelector(
    adata,
    n=None,
    celltype_key="Manuscript_Identity",
    verbosity=1,
    save_dir=None,
    pca_penalties=["expr_penalty_lower", "expr_penalty_upper"],
    DE_penalties=["expr_penalty_lower", "expr_penalty_upper"],
    m_penalties_adata_celltypes=["expr_penalty_lower", "expr_penalty_upper"],
    m_penalties_list_celltypes=["expr_penalty_upper"],
)
selector_highexpression.select_probeset()

Export probes for simulations

selector_highexpression.probeset[selector_highexpression.probeset.selection].to_csv('SPAPROS-highexpression.csv')

Quick summary of selected probes

What is the distribution of chosen probes among methods?

selector_highexpression.plot_gene_overlap()
# list probes
selector_highexpression.probeset.index[selector_highexpression.probeset.selection]

Table of characteristics for selected probes

probe_candidates = selector_highexpression.probeset[selector_highexpression.probeset.selection]
probe_candidates

Dotplots of probeset split by selection method

Cluster markers (DE 1vs all)

probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)

Nuanced cluster markers (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:15], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[15::], groupby='Manuscript_Identity', dendrogram=False)

PCA based (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)

Probset cross-correlation

How well are probes correlated with each other?

Probeset performance evaluation within SPAPROS

Set up evaluation.

# I think there is at least one bug in the code that expects "celltype" as an element.
adata.obs['celltype']=adata.obs['Manuscript_Identity']

# instantiate evaluator
evaluator = sp.ev.ProbesetEvaluator(adata, celltype_key='Manuscript_Identity', metrics={'cluster_similarity','knn_overlap','forest_clfs','marker_corr','gene_corr'}, verbosity=2, results_dir=None)

Reference: trial lung panel

xenium_panel1_genes = pd.read_csv('Xenium_panel_order_08_10_23.csv')['Gene']
evaluator.evaluate_probeset(xenium_panel1_genes, set_id="Xenium Lung Trial")

SPAPROS basic probeset

evaluator.evaluate_probeset(selector.probeset.index[selector.probeset.selection], set_id="SPAPROS basic")

SPAPROS high expression minimal probeset

evaluator.evaluate_probeset(selector_highexpression.probeset.index[selector_highexpression.probeset.selection].tolist(), set_id="SPAPROS high expression minimal")
evaluator.summary_statistics()
evaluator.plot_summary()
# vignette uses a different method, this is the one in the wrapper functions listing
evaluator.plot_marker_corr()
LS0tCnRpdGxlOiAiUHJvYmUgc2VsZWN0aW9uIGZvciBJUEYgbHVuZyB1c2luZyBTUEFQUk9TIgpvdXRwdXQ6CiAgZ2l0aHViX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQotLS0gCgojIEVudmlyb25tZW50CgojIyBsb2FkIFIgZW52aXJvbm1lbnQKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoU2V1cmF0KQpgYGAKCiMjIExvYWQgSVBGIENlbGwgQXRsYXMgZGF0YQpTcGFwcm9zIGlzIGV4cGVjdGluZyBzY2FucHkgZm9ybWF0IGRhdGEgd2l0aCByYXcgY291bnRzLiBMb2FkIGJhc2VkIG9uIGVzdGFibGlzaGVkIFIgd29ya2Zsb3cgdGhlbiBjb252ZXJ0LgojIyMgUmVhZCBpbiBHU0UxMzY4MzEKYGBge3J9Cm9yaWdpbmFsLmRpciA8LSAoJ34vdmNhbGFiLWZpbGVzL2RhdGFzZXRzL0dTRTEzNjgzMV9JUEZfQ2VsbF9BdGxhcy8nKQptZXRhZGF0YSA8LSByZWFkX3RzdihwYXN0ZTAob3JpZ2luYWwuZGlyLCdHU0UxMzY4MzFfQWxsQ2VsbHMuU2FtcGxlcy5DZWxsVHlwZS5NZXRhZGF0YVRhYmxlLnR4dC5neicpKQptZXRhZGF0YSA8LSBhcy5kYXRhLmZyYW1lKG1ldGFkYXRhKQpyb3duYW1lcyhtZXRhZGF0YSkgPC0gbWV0YWRhdGEkQ2VsbEJhcmNvZGVfSWRlbnRpdHkKCmZlYXR1cmVzIDwtIHJlYWRfdHN2KHBhc3RlMChvcmlnaW5hbC5kaXIsJ0dTRTEzNjgzMV9BbGxDZWxscy5HZW5lSURzLnR4dC5neicpKQpjZWxscyA8LSByZWFkX3RzdihwYXN0ZTAob3JpZ2luYWwuZGlyLCdHU0UxMzY4MzFfQWxsQ2VsbHMuY2VsbEJhcmNvZGVzLnR4dC5neicpLCBjb2xfbmFtZXMgPSBGKQoKY291bnRzIDwtIE1hdHJpeDo6cmVhZE1NKHBhc3RlMChvcmlnaW5hbC5kaXIsJ0dTRTEzNjgzMV9SYXdDb3VudHNfU3BhcnNlLm10eC5neicpKQpyb3duYW1lcyhjb3VudHMpIDwtIGZlYXR1cmVzJEhHTkNfRW5zZW1ibEFsdF9HZW5lSUQKY29sbmFtZXMoY291bnRzKSA8LSBjZWxscyRYMQpgYGAKYGBge3J9CmlwZmF0bGFzLmNlbGxzIDwtIENyZWF0ZVNldXJhdE9iamVjdChjb3VudHM9Y291bnRzLCBtZXRhLmRhdGE9bWV0YWRhdGEpCmBgYApgYGB7cn0Kcm0oY291bnRzLGNlbGxzLGZlYXR1cmVzLG1ldGFkYXRhKQpnYygpCmBgYAojIyMgU3RhbmRhcmQgZmlsdGVycwpgYGB7cn0KaXBmYXRsYXMuY2VsbHNbWydwZXJjZW50Lm10J11dIDwtIFBlcmNlbnRhZ2VGZWF0dXJlU2V0KGlwZmF0bGFzLmNlbGxzLCBwYXR0ZXJuPSdeTVQtJykKVmxuUGxvdChpcGZhdGxhcy5jZWxscywgZmVhdHVyZXMgPSBjKCJuRmVhdHVyZV9STkEiLCAibkNvdW50X1JOQSIsICJwZXJjZW50Lm10IiksIG5jb2wgPSAzLCBwdC5zaXplID0gMCkKYGBgCmBgYHtyfQppcGZhdGxhcy5jZWxscyA8LSBzdWJzZXQoaXBmYXRsYXMuY2VsbHMsIHN1YnNldCA9IG5GZWF0dXJlX1JOQSA+IDIwMCAmIG5GZWF0dXJlX1JOQSA8IDUwMDAgJiBwZXJjZW50Lm10IDwgMTApCmBgYAojIyMgQ29udmVydCB0byBzY2FucHkgY29tcGF0aWJsZSBmb3JtYXQgKGFubmRhdGEpClRoaXMgaXMgcmVtYXJrYWJseSBub3QgYSBzb2x2ZWQgcHJvYmxlbSBidXQgc2NlYXN5IGhhcyBmdW5jdGlvbnMgZm9yIGl0LiBoYXZlIHRvIGJhY2stY29udmVydCB0aGUgU2V1cmF0NSBvYmplY3QgdG8gYSB2My1saWtlIGFzc2F5LgpgYGB7cn0KaXBmYXRsYXMuY2VsbHNbWydSTkEnXV0gPC0gYXMoaXBmYXRsYXMuY2VsbHNbWydSTkEnXV0sJ0Fzc2F5JykKc2NlYXN5Ojpjb252ZXJ0Rm9ybWF0KGlwZmF0bGFzLmNlbGxzLCBmcm9tPSdzZXVyYXQnLHRvPSdhbm5kYXRhJywgb3V0RmlsZSA9ICdpcGZhdGxhcy5hZGF0YScpCmBgYAoKCgojIyBsb2FkIHB5dGhvbiBlbnZpcm9ubWVudApXZWlyZCBiZWhhdmlvciB3aGVuIHJldGljdWxhdGUgaW5zdGFudGlhdGVkIGJ5IG90aGVyIHBhY2thZ2VzLCB0aGVyZWZvcmUgcmVzdGFydCBSLgpgYGB7cn0KLnJzLnJlc3RhcnRSKCkKYGBgClVzaW5nIHRoZSBTUEFQUk9TIHdvcmtmbG93IGRvY3VtZW50ZWQgYnkgdGhlaXMgbGFiIHdoaWNoIGlzIHB5dGhvbi9zY2FucHkgYmFzZWQuCmBgYHtweXRob259CmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IHNjYW5weSBhcyBzYwppbXBvcnQgc3BhcHJvcyBhcyBzcApzYy5zZXR0aW5ncy52ZXJib3NpdHk9MApzYy5sb2dnaW5nLnByaW50X2hlYWRlcigpCnByaW50KGYic3BhcHJvcz09e3NwLl9fdmVyc2lvbl9ffSIpCmBgYApBY2Nlc3MgYWRhdGEgb2JqZWN0IG9uIHRoZSBweXRob24gc2lkZS4gdG8gYXZvaWQgbmFtZSBjb252ZXJzaW9ucyB0aHJvdWdob3V0IHB5dGhvbiBjb2RlLCBqdXN0IG1ha2UgaXQgImFkYXRhLiIgVW5wcmVkaWN0YWJsZSBiZWhhdmlvciB3aGVuIHBvcnRpbmcgZnJvbSBSIHRvIHB5dGhvbiBlbnZpcm9ubWVudCwgc28gc3RvcmUgYXMgaDVhZCBhbmQgcmVhZCBiYWNrIGluLgpgYGB7cHl0aG9ufQphZGF0YSA9IHNjLnJlYWRfaDVhZCgnaXBmYXRsYXMuYWRhdGEnKQphZGF0YQpgYGAKIyBTUEFQUk9TIHByb2JlIHNlbGVjdGlvbgojIyBQcmVwcm9jZXNzIGRhdGEgd2l0aCBzY2FucHkKRmlsdGVyaW5nIGhhcyBhbHJlYWR5IGJlZW4gZG9uZSBvbiB0aGUgU2V1cmF0IHNpZGUuCmBgYHtweXRob259CnNjLnBwLm5vcm1hbGl6ZV90b3RhbChhZGF0YSwgdGFyZ2V0X3N1bT0xMDAwMCkgIyAxMGsgdGFyZ2V0IG1hdGNoZXMgZGVmYXVsdCBTZXVyYXQgc2NhbGUgZmFjdG9yCnNjLnBwLmxvZzFwKGFkYXRhKQpzYy5wcC5oaWdobHlfdmFyaWFibGVfZ2VuZXMoYWRhdGEsZmxhdm9yPSJjZWxsX3JhbmdlciIsbl90b3BfZ2VuZXM9MTAwMCkKc2MucHAucGNhKGFkYXRhLCBzdmRfc29sdmVyPSdsb2JwY2cnKSAjIGhhZCBpc3N1ZXMgd2l0aCBhcnBhY2sKc2MucHAubmVpZ2hib3JzKGFkYXRhLCBuX25laWdoYm9ycz0xMCwgbl9wY3M9MzApCnNjLnRsLnVtYXAoYWRhdGEpCnNjLnBsLnVtYXAoYWRhdGEsIGNvbG9yPVsnQ2VsbFR5cGVfQ2F0ZWdvcnknXSkKc2MucGwudW1hcChhZGF0YSwgY29sb3I9WydEaXNlYXNlX0lkZW50aXR5J10pCmBgYAojIyBTUEFQUk9TIGJhc2ljOiBubyBjb25zdHJhaW50cy4KQXNrZWQgZm9yIDIwMCBnZW5lcywgYmVjYXVzZSBvbmUgaW1hZ2luZXMgdGhhdCB2YXJpb3VzIHN0YWtlaG9sZGVycyB3aWxsIGFkZCBhbm90aGVyIDEwMCBmb3IgSVBGIHN1YnNldHMsIGFuZCBwcm9iYWJseSBhc3RobWEgc3Rha2Vob2xkZXJzIHdpbGwgYWRkIGFub3RoZXIgMTAwIGJyaW5naW5nIHRvdGFsIHRvIDQwMC4gTm90ZSB0aGF0IHRoaXMgZGF0YXNldCBpbmNsdWRlcyBub3JtYWwgbHVuZyBhbmQgQ09QRCBsdW5nLiAKYGBge3B5dGhvbn0Kc2VsZWN0b3IgPSBzcC5zZS5Qcm9iZXNldFNlbGVjdG9yKGFkYXRhLCBuPTIwMCwgY2VsbHR5cGVfa2V5PSJNYW51c2NyaXB0X0lkZW50aXR5IiwgdmVyYm9zaXR5PTAsIHNhdmVfZGlyPU5vbmUpCnNlbGVjdG9yLnNlbGVjdF9wcm9iZXNldCgpCmBgYAojIyMgRXhwb3J0IHByb2JlcyBmb3Igc2ltdWxhdGlvbnMKYGBge3B5dGhvbn0Kc2VsZWN0b3IucHJvYmVzZXRbc2VsZWN0b3IucHJvYmVzZXQuc2VsZWN0aW9uXS50b19jc3YoJ1NQQVBST1MtYmFzaWMyMDAuY3N2JykKYGBgCgojIyMgUXVpY2sgc3VtbWFyeSBvZiBzZWxlY3RlZCBwcm9iZXMKV2hhdCBpcyB0aGUgZGlzdHJpYnV0aW9uIG9mIGNob3NlbiBwcm9iZXMgYW1vbmcgbWV0aG9kcz8KYGBge3B5dGhvbn0Kc2VsZWN0b3IucGxvdF9nZW5lX292ZXJsYXAoKQpgYGAKSSBkb24ndCB1bmRlcnN0YW5kIHdoYXQgdGhlIDR0aCBjb2x1bW4gaXMsIDMzIGdlbmVzIHRoYXQgaGF2ZSBubyBhdHRyaWJ1dGlvbiBmb3IgdGhlIG1ldGhvZCBvZiBzZWxlY3Rpb24/IApgYGB7cHl0aG9ufQojIGxpc3QgcHJvYmVzCnNlbGVjdG9yLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yLnByb2Jlc2V0LnNlbGVjdGlvbl0KYGBgClRhYmxlIG9mIGNoYXJhY3RlcmlzdGljcyBmb3Igc2VsZWN0ZWQgcHJvYmVzCmBgYHtweXRob259CnByb2JlX2NhbmRpZGF0ZXMgPSBzZWxlY3Rvci5wcm9iZXNldFtzZWxlY3Rvci5wcm9iZXNldC5zZWxlY3Rpb25dCnByb2JlX2NhbmRpZGF0ZXMKYGBgCiMjIyBEb3RwbG90cyBvZiBwcm9iZXNldCBzcGxpdCBieSBzZWxlY3Rpb24gbWV0aG9kCiMjIyMgQ2x1c3RlciBtYXJrZXJzIChERSAxdnMgYWxsKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfMXZzYWxsJ10hPScnKV0KcHJvYmVzID0gcHJvYmVzLnNvcnRfdmFsdWVzKCdjZWxsdHlwZXNfREUnKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMDo0MF0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbNDA6Ol0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpgYGAKIyMjIyBOdWFuY2VkIGNsdXN0ZXIgbWFya2VycyAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfc3BlY2lmaWMnXSE9JycpXQpwcm9iZXMgPSBwcm9iZXMuc29ydF92YWx1ZXMoJ2NlbGx0eXBlc19ERScpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFswOjI1XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFsyNTo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIFBDQSBiYXNlZCAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydwY2Ffc2VsZWN0ZWQnXSldCnByb2JlcyA9IHByb2Jlcy5zb3J0X3ZhbHVlcygnY2VsbHR5cGVzX0RFJykKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzA6MzNdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzMzOjY2XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFs2Njo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyBTUEFQUk9TIHdpdGggaGFuZC1zZWxlY3RlZCBnZW5lcwpXZSBhbHJlYWR5IGhhdmUgc29tZSB4ZW5pdW0gcnVucyB3aGVyZSBzb21lIHByb2Jlc2V0cyBoYXZlIHJlYXNvbmFibGUgYXZlcmFnZSBkZXRlY3Rpb24gYW5kIHNob3cgdXAgYXMgaGlnaGx5LXZhcmlhYmxlIGdlbmVzIGluIGNsdXN0ZXJpbmcgb2YgdGhlIFhlbml1bSBkYXRhLiBUaGVyZWZvcmUgSSByZWdhcmQgdGhlc2UgZ2VuZXMgYXMgcHJlc3VtcHRpdmVseSBoaWdoLXBlcmZvcm1hbmNlIGluZm9ybWF0aXZlIGdlbmVzLiBJbmNsdWRpbmcgdGhlc2UgaW4gdGhlIGRpdmVyc2l0eSBwYW5lbCB3aWxsIG1pdGlnYXRlIHRoZSByaXNrIG9mIHNlbGVjdGluZyBnZW5lcyB3aG9zZSBwcm9iZXNldHMgZG8gbm90IHBlcmZvcm0gd2VsbCBmb3IgdGVjaG5pY2FsIHJlYXNvbnMuIFRoZXJlZm9yZSBhc2sgU1BBUFJPUyB0byBpbmNsdWRlIHRoZXNlIGdlbmVzIGFuZCB0aGVuIG1vZGlmeSBpdHMgb3RoZXIgc2VsZWN0aW9ucyBhY2NvcmRpbmdseS4KYGBge3J9CnByb2Jlc2V0c194ZW5pdW1wZXJmb3JtYW5jZSA8LSByZWFkX2NzdihmaWxlPSdwcm9iZXNldHNfeGVuaXVtcGVyZm9ybWFuY2UuY3N2JykKcHJvYmVzZXRzX3hlbml1bXBlcmZvcm1hbmNlICU+JSBmaWx0ZXIoeGVuaXVtQXZnID4gMS4zNSkgJT4lIGZpbHRlcih2YXJpYW5jZS5zdGFuZGFyZGl6ZWQgPiAxLjIwKSAtPiBwcm9iZXNldHNfc2VsZWN0ZWQKcHJvYmVzZXRzX3NlbGVjdGVkIDwtIHByb2Jlc2V0c19zZWxlY3RlZCRnZW5lCmxlbmd0aChwcm9iZXNldHNfc2VsZWN0ZWQpCmBgYApgYGB7cHl0aG9ufQpwcmVzZWxlY3RlZF9nZW5lcz1yLnByb2Jlc2V0c19zZWxlY3RlZApgYGAKYGBge3B5dGhvbn0KIyBUcnkgdG8gYWNjb3VudCBmb3Igc2VsZWN0ZWQgZ2VuZXMgdGhhdCBhcmUgaW4gUENBIGdlbmVzLgp0bXAgPSBzcC5zZS5zZWxlY3RfcGNhX2dlbmVzKGFkYXRhLCBuPWFkYXRhLm5fdmFycywgaW5wbGFjZT1GYWxzZSlbInNlbGVjdGlvbl9yYW5raW5nIl0Kbl9wY2FfZ2VuZXMgPSB0bXAubG9jW350bXAuaW5kZXguaXNpbihwcmVzZWxlY3RlZF9nZW5lcyldLnNvcnRfdmFsdWVzKCkuaWxvY1s6MjBdLm1heCgpLmFzdHlwZShpbnQpCmBgYApGb3IgUENBIHJhbmtpbmcgZ2VuZXMsIHRoZXJlIGFyZSBvbmx5IDIgKFMxMDBBOCBhbmQgQVJFRykgdGhhdCBhcmUgaW4gdGhlIHRvcCAxMDAgUENBIGdlbmVzLiBTbyBJIHRoaW5rIHdlIGFyZSByZWFzb25hYmx5IHNhZmUgdG8ganVzdCBhZGQgbW9yZSBQQ0EgZ2VuZXMgb24gdG9wIG9mIHRoZXNlLgpgYGB7cHl0aG9ufQpzZWxlY3RvciA9IHNwLnNlLlByb2Jlc2V0U2VsZWN0b3IoYWRhdGEsIG49MjAwLCBjZWxsdHlwZV9rZXk9Ik1hbnVzY3JpcHRfSWRlbnRpdHkiLCB2ZXJib3NpdHk9MCwgc2F2ZV9kaXI9Tm9uZSwgcHJlc2VsZWN0ZWRfZ2VuZXM9cHJlc2VsZWN0ZWRfZ2VuZXMpCnNlbGVjdG9yLnNlbGVjdF9wcm9iZXNldCgpCmBgYAoKIyMjIEV4cG9ydCBwcm9iZXMgZm9yIHNpbXVsYXRpb25zCmBgYHtweXRob259CnNlbGVjdG9yLnByb2Jlc2V0W3NlbGVjdG9yLnByb2Jlc2V0LnNlbGVjdGlvbl0udG9fY3N2KCdTUEFQUk9TLTIwMHdpdGhYZW5pdW1DdXJhdGlvbi5jc3YnKQpgYGAKCiMjIyBRdWljayBzdW1tYXJ5IG9mIHNlbGVjdGVkIHByb2JlcwpXaGF0IGlzIHRoZSBkaXN0cmlidXRpb24gb2YgY2hvc2VuIHByb2JlcyBhbW9uZyBtZXRob2RzPwpgYGB7cHl0aG9ufQpzZWxlY3Rvci5wbG90X2dlbmVfb3ZlcmxhcCgpCmBgYApgYGB7cHl0aG9ufQojIGxpc3QgcHJvYmVzCnNlbGVjdG9yLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yLnByb2Jlc2V0LnNlbGVjdGlvbl0KYGBgClRhYmxlIG9mIGNoYXJhY3RlcmlzdGljcyBmb3Igc2VsZWN0ZWQgcHJvYmVzCmBgYHtweXRob259CnByb2JlX2NhbmRpZGF0ZXMgPSBzZWxlY3Rvci5wcm9iZXNldFtzZWxlY3Rvci5wcm9iZXNldC5zZWxlY3Rpb25dCnByb2JlX2NhbmRpZGF0ZXMKYGBgCiMjIyBEb3RwbG90cyBvZiBwcm9iZXNldCBzcGxpdCBieSBzZWxlY3Rpb24gbWV0aG9kCiMjIyMgQ2x1c3RlciBtYXJrZXJzIChERSAxdnMgYWxsKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfMXZzYWxsJ10hPScnKV0KcHJvYmVzID0gcHJvYmVzLnNvcnRfdmFsdWVzKCdjZWxsdHlwZXNfREUnKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMDo0MF0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbNDA6Ol0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpgYGAKIyMjIyBOdWFuY2VkIGNsdXN0ZXIgbWFya2VycyAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfc3BlY2lmaWMnXSE9JycpXQpwcm9iZXMgPSBwcm9iZXMuc29ydF92YWx1ZXMoJ2NlbGx0eXBlc19ERScpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFswOjI1XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFsyNTo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIFBDQSBiYXNlZCAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydwY2Ffc2VsZWN0ZWQnXSldCnByb2JlcyA9IHByb2Jlcy5zb3J0X3ZhbHVlcygnY2VsbHR5cGVzX0RFJykKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzA6MzNdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzMzOjY2XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFs2Njo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAoKIyMgU1BBUFJPUyB3aXRoIGV4cHJlc3Npb24gcGVuYWx0aWVzClBlbmFsdGllcyBhcmUgMCB0byAxLCB3aGVyZSAwIGlzIGJhZCBhbmQgMSBpcyBiZXN0IChpLmUuLCBubyBwZW5hbHR5KS4gQmVoYXZpb3Igb2YgcGVuYWx0aWVzIHNlZW1zIHRvIGRlcGVuZCBvbiB0aGUgdGhlIHNvdXJjZS4gSWYgaWRlbnRpZmllZCBieSB0aGUgcGFja2FnZSwgaXQncyBhIG11bHRpcGxpZXIuIElmIHRoZSBzb3VyY2UgaXMgZnJvbSBhIG1hbnVhbGx5IGN1cmF0ZWQgbGlzdCwgYWxsIGdlbmVzIHdpdGggKmFueSogcGVuYWx0eSBpcyByZW1vdmVkLgoKV2Ugd2VyZSB0b2xkIGJ5IDEweCB0aGF0IFNDR0IxQTEgYW5kIFNGVFBDIGhhZCB0b28gaGlnaCBhYnVuZGFuY2UgdG8gZGVzaWduIHByb2Jlcy4gQ09MMUExIGlzIHRoZSBoaWdoZXN0IHNpbmdsZSBkZXRlY3RlZCBnZW5lIGluIHRyaWFsIHJ1bnM7IEkgYW0gbm90IGF3YXJlIG9mIGFueSBjb2Rld29yZCBidWRnZXQgaXNzdWVzIHdpdGggQ09MMUExLgoKYGBge3B5dGhvbn0Kc2MucGwudmlvbGluKGFkYXRhLCBbJ1NDR0IxQTEnLCAnU0ZUUEMnLCAnQ09MMUExJ10pCmBgYApgYGB7cHl0aG9ufQpzYy5wbC52aW9saW4oYWRhdGEsIFsnR0RGMTUnLCAnQ0xETjQnLCAnS1JUOCddKQpgYGAKSWYgd2Ugd2FudGVkIHRvIGJlIGZhaXJseSBhZ2dyZXNzaXZlLCBhIHRocmVzaG9sZCBvZiBsaWtlIDEgdG8gNiB3b3VsZCBnZXQgdXMgZ2VuZXMgdGhhdCBhcmUgbGlrZSBDT0wxQTEgb3IgZXZlbiBtb3JlIGhpZ2hseSBleHByZXNzZWQuIFNvbWUgaGlnaCBwZXJmb3JtYW5jZSBnZW5lcyBsaWtlIEtSVDggd291bGRuJ3QgbmVjZXNzYXJpbHkgYmUgc2VsZWN0ZWQuCmBgYHtweXRob259CiMgU2V0IHRocmVzaG9sZHMKbG93ZXJfdGggPSAxLjAKdXBwZXJfdGggPSA2CkZBQ1RPUiA9IDAuMQoKIyBDYWxjdWxhdGUgcXVhbnRpbGVzCnNwLnV0LmdldF9leHByZXNzaW9uX3F1YW50aWxlKGFkYXRhLCBxPTAuOTksIG5vcm1hbGlzZT1GYWxzZSwgbG9nMXA9RmFsc2UsIHplcm9zX3RvX25hbj1GYWxzZSkKc3AudXQuZ2V0X2V4cHJlc3Npb25fcXVhbnRpbGUoYWRhdGEsIHE9MC45LCBub3JtYWxpc2U9RmFsc2UsIGxvZzFwPUZhbHNlLCB6ZXJvc190b19uYW49VHJ1ZSkKCiMgR2V0IHBlbmFsdHkgZnVuY3Rpb25zIGZvciBnaXZlbiBmYWN0b3IKcGVuYWx0eV9mY3RzPXt9CnBlbmFsdHlfZmN0c1tmImxvd2VyX3tGQUNUT1J9Il0gPSBzcC51dC5wbGF0ZWF1X3BlbmFsdHlfa2VybmVsKHZhcj0wLjEgKiBGQUNUT1IsIHhfbWluPWxvd2VyX3RoLCB4X21heD1Ob25lKQpwZW5hbHR5X2ZjdHNbZiJ1cHBlcl97RkFDVE9SfSJdID0gc3AudXQucGxhdGVhdV9wZW5hbHR5X2tlcm5lbCh2YXI9MC41KkZBQ1RPUiwgeF9taW49Tm9uZSwgeF9tYXg9dXBwZXJfdGgpCiMgQ2FsY3VsYXRlIGVhY2ggZ2VuZSdzIHBlbmFsdHkgdmFsdWUKYWRhdGEudmFyW2YiZXhwcl9wZW5hbHR5X2xvd2VyX3tGQUNUT1J9Il0gPSBwZW5hbHR5X2ZjdHNbZiJsb3dlcl97RkFDVE9SfSJdKGFkYXRhLnZhclsncXVhbnRpbGVfMC45IGV4cHIgPiAwJ10pCmFkYXRhLnZhcltmImV4cHJfcGVuYWx0eV91cHBlcl97RkFDVE9SfSJdID0gcGVuYWx0eV9mY3RzW2YidXBwZXJfe0ZBQ1RPUn0iXShhZGF0YS52YXJbJ3F1YW50aWxlXzAuOTknXSkKIyBQQ0EgYW5kIERFIHNlbGVjdGlvbnMgd2l0aCBwZW5hbHRpZXMKcGVuYWx0eV9rZXlzID0gW2YiZXhwcl9wZW5hbHR5X2xvd2VyX3tGQUNUT1J9IixmImV4cHJfcGVuYWx0eV91cHBlcl97RkFDVE9SfSJdCgphZGF0YS52YXJbImV4cHJfcGVuYWx0eV9sb3dlciJdID0gYWRhdGEudmFyW2YiZXhwcl9wZW5hbHR5X2xvd2VyX3tGQUNUT1J9Il0KYWRhdGEudmFyWyJleHByX3BlbmFsdHlfdXBwZXIiXSA9IGFkYXRhLnZhcltmImV4cHJfcGVuYWx0eV91cHBlcl97RkFDVE9SfSJdCgpgYGAKUnVuIHRoZSBzZWxlY3Rpb24uIEluIHRoaXMgcnVuIHdlIGhhdmUgbm90IHNlZWRlZCBhbnkgc3BlY2lmaWMgbnVtYmVyIG9mIGdlbmVzLCBjdXJpb3VzIHdoYXQgaXQgY29tZXMgdXAgd2l0aC4KYGBge3B5dGhvbn0KIyBjcmVhdGUgYW4gaW5zdGFuY2Ugb2YgdGhlIFByb2Jlc2V0U2VsZWN0b3IgY2xhc3MKc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24gPSBzcC5zZS5Qcm9iZXNldFNlbGVjdG9yKAogICAgYWRhdGEsCiAgICBuPU5vbmUsCiAgICBjZWxsdHlwZV9rZXk9Ik1hbnVzY3JpcHRfSWRlbnRpdHkiLAogICAgdmVyYm9zaXR5PTEsCiAgICBzYXZlX2Rpcj1Ob25lLAogICAgcGNhX3BlbmFsdGllcz1bImV4cHJfcGVuYWx0eV9sb3dlciIsICJleHByX3BlbmFsdHlfdXBwZXIiXSwKICAgIERFX3BlbmFsdGllcz1bImV4cHJfcGVuYWx0eV9sb3dlciIsICJleHByX3BlbmFsdHlfdXBwZXIiXSwKICAgIG1fcGVuYWx0aWVzX2FkYXRhX2NlbGx0eXBlcz1bImV4cHJfcGVuYWx0eV9sb3dlciIsICJleHByX3BlbmFsdHlfdXBwZXIiXSwKICAgIG1fcGVuYWx0aWVzX2xpc3RfY2VsbHR5cGVzPVsiZXhwcl9wZW5hbHR5X3VwcGVyIl0sCikKCmBgYApgYGB7cHl0aG9ufQpzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbi5zZWxlY3RfcHJvYmVzZXQoKQpgYGAKIyMjIEV4cG9ydCBwcm9iZXMgZm9yIHNpbXVsYXRpb25zCmBgYHtweXRob259CnNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0W3NlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LnNlbGVjdGlvbl0udG9fY3N2KCdTUEFQUk9TLWhpZ2hleHByZXNzaW9uLmNzdicpCmBgYAoKIyMjIFF1aWNrIHN1bW1hcnkgb2Ygc2VsZWN0ZWQgcHJvYmVzCldoYXQgaXMgdGhlIGRpc3RyaWJ1dGlvbiBvZiBjaG9zZW4gcHJvYmVzIGFtb25nIG1ldGhvZHM/CmBgYHtweXRob259CnNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnBsb3RfZ2VuZV9vdmVybGFwKCkKYGBgCgpgYGB7cHl0aG9ufQojIGxpc3QgcHJvYmVzCnNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LnNlbGVjdGlvbl0KYGBgClRhYmxlIG9mIGNoYXJhY3RlcmlzdGljcyBmb3Igc2VsZWN0ZWQgcHJvYmVzCmBgYHtweXRob259CnByb2JlX2NhbmRpZGF0ZXMgPSBzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbi5wcm9iZXNldFtzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbi5wcm9iZXNldC5zZWxlY3Rpb25dCnByb2JlX2NhbmRpZGF0ZXMKYGBgCiMjIyBEb3RwbG90cyBvZiBwcm9iZXNldCBzcGxpdCBieSBzZWxlY3Rpb24gbWV0aG9kCiMjIyMgQ2x1c3RlciBtYXJrZXJzIChERSAxdnMgYWxsKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfMXZzYWxsJ10hPScnKV0KcHJvYmVzID0gcHJvYmVzLnNvcnRfdmFsdWVzKCdjZWxsdHlwZXNfREUnKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMDo0MF0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbNDA6Ol0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpgYGAKIyMjIyBOdWFuY2VkIGNsdXN0ZXIgbWFya2VycyAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfc3BlY2lmaWMnXSE9JycpXQpwcm9iZXMgPSBwcm9iZXMuc29ydF92YWx1ZXMoJ2NlbGx0eXBlc19ERScpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFswOjE1XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFsxNTo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIFBDQSBiYXNlZCAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydwY2Ffc2VsZWN0ZWQnXSldCnByb2JlcyA9IHByb2Jlcy5zb3J0X3ZhbHVlcygnY2VsbHR5cGVzX0RFJykKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzA6MzNdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzMzOjY2XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFs2Njo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIFByb2JzZXQgY3Jvc3MtY29ycmVsYXRpb24KSG93IHdlbGwgYXJlIHByb2JlcyBjb3JyZWxhdGVkIHdpdGggZWFjaCBvdGhlcj8KYGBge3B5dGhvbn0KCmBgYAoKCiMgUHJvYmVzZXQgcGVyZm9ybWFuY2UgZXZhbHVhdGlvbiB3aXRoaW4gU1BBUFJPUwpTZXQgdXAgZXZhbHVhdGlvbi4KYGBge3B5dGhvbn0KIyBJIHRoaW5rIHRoZXJlIGlzIGF0IGxlYXN0IG9uZSBidWcgaW4gdGhlIGNvZGUgdGhhdCBleHBlY3RzICJjZWxsdHlwZSIgYXMgYW4gZWxlbWVudC4KYWRhdGEub2JzWydjZWxsdHlwZSddPWFkYXRhLm9ic1snTWFudXNjcmlwdF9JZGVudGl0eSddCgojIGluc3RhbnRpYXRlIGV2YWx1YXRvcgpldmFsdWF0b3IgPSBzcC5ldi5Qcm9iZXNldEV2YWx1YXRvcihhZGF0YSwgY2VsbHR5cGVfa2V5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgbWV0cmljcz17J2NsdXN0ZXJfc2ltaWxhcml0eScsJ2tubl9vdmVybGFwJywnZm9yZXN0X2NsZnMnLCdtYXJrZXJfY29ycicsJ2dlbmVfY29ycid9LCB2ZXJib3NpdHk9MiwgcmVzdWx0c19kaXI9Tm9uZSkKYGBgCgojIyMjIFJlZmVyZW5jZTogdHJpYWwgbHVuZyBwYW5lbApgYGB7cHl0aG9ufQp4ZW5pdW1fcGFuZWwxX2dlbmVzID0gcGQucmVhZF9jc3YoJ1hlbml1bV9wYW5lbF9vcmRlcl8wOF8xMF8yMy5jc3YnKVsnR2VuZSddCmBgYApgYGB7cHl0aG9uLCBldmFsPUZ9CmV2YWx1YXRvci5ldmFsdWF0ZV9wcm9iZXNldCh4ZW5pdW1fcGFuZWwxX2dlbmVzLCBzZXRfaWQ9Ilhlbml1bSBMdW5nIFRyaWFsIikKYGBgCiMjIyMgU1BBUFJPUyBiYXNpYyBwcm9iZXNldApgYGB7cHl0aG9uLCBldmFsPUZ9CmV2YWx1YXRvci5ldmFsdWF0ZV9wcm9iZXNldChzZWxlY3Rvci5wcm9iZXNldC5pbmRleFtzZWxlY3Rvci5wcm9iZXNldC5zZWxlY3Rpb25dLCBzZXRfaWQ9IlNQQVBST1MgYmFzaWMiKQpgYGAKIyMjIyBTUEFQUk9TIGhpZ2ggZXhwcmVzc2lvbiBtaW5pbWFsIHByb2Jlc2V0CmBgYHtweXRob259CmV2YWx1YXRvci5ldmFsdWF0ZV9wcm9iZXNldChzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbi5wcm9iZXNldC5pbmRleFtzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbi5wcm9iZXNldC5zZWxlY3Rpb25dLnRvbGlzdCgpLCBzZXRfaWQ9IlNQQVBST1MgaGlnaCBleHByZXNzaW9uIG1pbmltYWwiKQpgYGAKYGBge3B5dGhvbn0KZXZhbHVhdG9yLnN1bW1hcnlfc3RhdGlzdGljcygpCmV2YWx1YXRvci5wbG90X3N1bW1hcnkoKQpgYGAKCmBgYHtweXRob259CiMgdmlnbmV0dGUgdXNlcyBhIGRpZmZlcmVudCBtZXRob2QsIHRoaXMgaXMgdGhlIG9uZSBpbiB0aGUgd3JhcHBlciBmdW5jdGlvbnMgbGlzdGluZwpldmFsdWF0b3IucGxvdF9tYXJrZXJfY29ycigpCmBgYAoK